{
####################################
#Note:
# 76/276/70, 476: HAD ez-passes
# 79, 80, 81, : NO EZPass
####################################

########################
#Initial setup here here
########################

#external libraries
rm( list = ls() )
library(ggplot2)
library(stargazer)
library(MatchIt)
library(AER);
library(ivpack) # devtools::install_github('cran/ivpack')
setwd("~/Dropbox/Secondary Level/Collaborations/Traffic/project_analysis/dataverse_replication")
source("./helperCode/master_helper_functions.R")

#load in data
my_data <- read.csv(file = "./EZPassData_clean.csv" )
my_data_with_campaign_finance <- read.csv(file = "./EZPassData_clean_with_campaign_finance.csv" )

#did w Sds
source("./helperCode/causal_effect_estimation/dd_with_linearSDs_function.R")
inclusion_rule <- 12; exclusion_rule <- 18
treatment_basis_name <- "EZPlaza_TravelDistOptimizedDist_Total_Leng"
control_basis_name <- c("minNetworkOptimizedDistance_Distance_nonEZ_hwy")
fixed_effect_names <- c("County")

#matching choices
my_replace <- F ; my_discard <- "control"; my_distance <- "logit"
my_caliper <- 0.20; bootstrap_numb <- 300
my_method <- "nearest"; county_fixed_effects <- T

source("./helperCode/causal_effect_estimation/dd_with_matchBoot_function.R")
matching_variables <- c("census_2000_pblack", "census_2000_pfemale",
                        "census_2000_pprofessionaldegree", "census_2000_ppop_over65",
                        "census_2000_pbachelors", "census_2000_AverageIncome",
                        "census_2000_residence_samehouse1995")

#first did to test things out and check the balance
source("./helperCode/causal_effect_estimation/dd_with_matchBoot_function.R")
RESULT_bootSDs_housingprices <- dd_with_dd(my_data=my_data[!(my_data$STATEFP10 %in% 39), ],
                                           treatment_basis_name=treatment_basis_name,
                                           control_basis_name=control_basis_name,
                                           outcome_time1_name="census_2000_AverageHomePrice",
                                           outcome_time2_name="census_2009_AverageHomePrice",
                                           matching_variables  = matching_variables,
                                           treatment_basis_treated_max = inclusion_rule,
                                           treatment_basis_control_min = exclusion_rule,
                                           control_basis_control_max = inclusion_rule,
                                           control_basis_treated_min = exclusion_rule,
                                           my_replace = my_replace, my_discard = my_discard,
                                           start_browser = F, my_distance = my_distance,
                                           my_caliper = my_caliper, bootstrap_numb = bootstrap_numb,
                                           my_method = my_method,boostrap_SDs = T, match=T)
#write.csv(RESULT_bootSDs_housingprices$matched_dataset, file ="./EZPass_matched_dataset.csv")
did_housing <- c(RESULT_bootSDs_housingprices$lower,
                 RESULT_bootSDs_housingprices$point_est,
                 RESULT_bootSDs_housingprices$upper)

matched_dataset <- RESULT_bootSDs_housingprices$matched_dataset[[1]]
standardize_diff_vec <- c()
name_vec <- c()
counter_vec <- diff_sd_vec <- raw_diff_vec <- c()
mean_T_vec <- mean_C_vec <- sd_T_vec <- sd_C_vec <- overall_mean_vec <- overall_sd_vec <- c()
success_counter <- 0
for(i in 1:(ncol(matched_dataset)) )
{
  success_counter <- success_counter + 1
  mean_T_vec[i] <- mean_T <- mean(f2n(matched_dataset[matched_dataset$treated_indicator==1,i]), na.rm=T)
  sd_T_vec[i] <- sd(f2n(matched_dataset[matched_dataset$treated_indicator==1,i]), na.rm=T)
  mean_C_vec[i] <- mean_C <- mean(f2n(matched_dataset[matched_dataset$treated_indicator==0,i]), na.rm=T)
  sd_C_vec[i] <- sd(f2n(matched_dataset[matched_dataset$treated_indicator==0,i]), na.rm=T)

  overall_mean_vec[i] <- mean(f2n(matched_dataset[,i]), na.rm=T)
  overall_sd_vec[i] <- sd(f2n(matched_dataset[,i]), na.rm=T)

  sd_T <- se(f2n(matched_dataset[matched_dataset$treated_indicator==1,i]) ) ; sd_C <- se(f2n(matched_dataset[matched_dataset$treated_indicator==0,i]) )

  n_T <- sum( matched_dataset$treated_indicator==1, na.rm = T);  n_C <- sum( matched_dataset$treated_indicator==0, na.rm = T )

  #storage
  var_diff <- sd_T^2 + sd_C^2
  standardize_diff_vec[success_counter] <- (mean_T - mean_C) / sqrt( var_diff )
  counter_vec[success_counter] <- success_counter
  name_vec[success_counter] <- colnames(matched_dataset)[i]
  raw_diff_vec[success_counter] <- mean_T - mean_C
  diff_sd_vec[success_counter] <- sqrt(var_diff)
}

ggdata <- as.data.frame(cbind(standardize_diff_vec, name_vec,
                              counter_vec, raw_diff_vec, diff_sd_vec,
                              mean_T_vec, mean_C_vec,
                              sd_T_vec, sd_C_vec,
                              overall_mean_vec, overall_sd_vec ))
ggdata <- ggdata[-c(10:13), ] [-c(1,9), ]
ggdata$name_vec <- c("Average income", "% bachelors",  "% black", "% professional degree",
                     "% female", "% of pop. over 65", "% in same house as in '95")

ggdata <- as.data.frame( apply(ggdata, 2, function(x) as.character( x ) ), stringsAsFactors = F )
stargazer_input_df <- as.matrix(cbind(ggdata$name_vec,
                                      ggdata$overall_mean_vec, ggdata$overall_sd_vec,
                                      ggdata$mean_T_vec, ggdata$sd_T_vec,
                                      ggdata$mean_C_vec, ggdata$sd_C_vec,
                                      ggdata$raw_diff_vec, ggdata$diff_sd_vec))
stargazer_input_df[-1,-1] <- apply(stargazer_input_df[-1,-1], 2, function(x) 100*f2n(x))
colnames(stargazer_input_df) <- c("",
                                  "Mean", "(S.D.)",
                                  "Mean", "(S.D.)",
                                  "Mean", "(S.D.)",
                                  "Dif.", "(S.D)")
row.names(stargazer_input_df) <- NULL
stargazer_input_df[,-1] <- round(f2n(stargazer_input_df[,-1]), 2)
stargazer_input_df[1,-c(1,3,5,7,9)] <- sprintf("$%s", stargazer_input_df[1,-c(1,3,5,7,9)])
stargazer_input_df[,c(3,5,7, 9)] <- sprintf("(%s)", stargazer_input_df[,c(3,5,7, 9)])
#stargazer_input_df[-c(1),c(2,4,6, 8)] <- sprintf("%s", stargazer_input_df[-c(1),c(2,4,6,8)])

stargazer( stargazer_input_df,
           font.size = "footnotesize",
           label = "balance_table",
           title = sprintf("Pre-treatment balance. Data from the 2000 Census.
                           Sample size: %s treated and %s control units.",
                           sum(matched_dataset$treated_indicator==1),
                           sum(matched_dataset$treated_indicator==0) ) )

#main DID results for vote share change, 2000 to 2004
source("./helperCode/causal_effect_estimation/dd_with_BootPreMatch_function.R")
source("./helperCode/causal_effect_estimation/dd_with_matchBoot_function.R")
set.seed(51)
MainResults_mat_00_to_04_full <- did_main_four_calcs(input_df = my_data[! (my_data$STATEFP10==39), ],
                                       outcome_time1_name = "USPP2000",
                                       outcome_time2_name = "USPP2004")
MainResults_mat_00_to_04 <- MainResults_mat_00_to_04_full[[1]]
stargazer_df <- matrix(NA, nrow = 2, 4)
stargazer_df[,1] <- c("Baseline model", "With covariate adjustment")
stargazer_df[,2] <- MainResults_mat_00_to_04[1:2,2]
stargazer_df[,3] <- MainResults_mat_00_to_04[c(1:2), 3]
stargazer_df[,4] <- MainResults_mat_00_to_04[c(3:4), 3]
colnames(stargazer_df) <- c("", "Estimate", "(Bootstrap S.D.)", "(Block Bootstrap S.D.)")
stargazer_df[,-1] <- apply(stargazer_df[,-1], 2, function(x) round(100 * f2n(x), 2) )
stargazer_df[,3:4] <- apply(stargazer_df[,3:4], 2, function(x) sprintf("(%s)", x) )
stargazer_df_voteshare_00_to_04 <- stargazer_df
stargazer(stargazer_df_voteshare_00_to_04,
          font.size = "footnotesize",
          label = "voteshare_did",
          title = "Difference-in-difference results for change in Democratic vote share.
          Matching/control variables include precinct-level covariates on average income,
          percent of residents living in the same house as in 1995, percent of residents who are black,
          percent of residents who are over the age of 65, percent of residents with a bachelor's degree. All matching/control
          data are from the US census.")

#main did for vote share change, 2000 to 2008
MainResults_mat_00_to_08 <- MainResults_mat_00_to_08_full <- did_main_four_calcs(input_df = my_data[! (my_data$STATEFP10==39), ],
                                       outcome_time1_name = "USPP2000",
                                       outcome_time2_name = "USPP2008")
MainResults_mat_00_to_08 <- MainResults_mat_00_to_08[[1]]
stargazer_df <- matrix(NA, nrow = 2, 4)
stargazer_df[,1] <- c("Baseline model", "With covariate adjustment")
stargazer_df[,2] <- MainResults_mat_00_to_08[1:2,2]
stargazer_df[,3] <- MainResults_mat_00_to_08[c(1:2), 3]
stargazer_df[,4] <- MainResults_mat_00_to_08[c(3:4), 3]
colnames(stargazer_df) <- c("", "Estimate", "(Bootstrap S.D.)", "(Block Bootstrap S.D.)")
stargazer_df[,-1] <- apply(stargazer_df[,-1], 2, function(x) round(100 * f2n(x), 2) )
stargazer_df[,3:4] <- apply(stargazer_df[,3:4], 2, function(x) sprintf("(%s)", x) )
stargazer_df_voteshare_00_to_08 <- stargazer_df
stargazer(stargazer_df_voteshare_00_to_08,
          font.size = "footnotesize",
          label = "voteshare_did",
          title = "Difference-in-difference results for change in Democratic vote share.
          Matching/control variables include precinct-level covariates on average income,
          percent of residents living in the same house as in 1995, percent of residents who are black,
          percent of residents who are over the age of 65, percent of residents with a bachelor's degree. All matching/control
          data are from the US census.")

#DID with commute times
MainResults_mat_commute_full <- MainResults_mat_commute <- did_main_four_calcs(input_df = my_data[! (my_data$STATEFP10==39), ],
                    outcome_time1_name = "census_2000_AverageCommuteTime",
                    outcome_time2_name = "census_2009_AverageCommuteTime")
MainResults_mat_commute = MainResults_mat_commute[[1]]
(f2n(MainResults_mat_commute[4,2])) /median( f2n(my_data[! (my_data$STATEFP10==39), ]$census_2000_AverageCommuteTime),na.rm=T)

#main did for housing values
MainResults_mat_housing_full <- MainResults_mat_housing <- did_main_four_calcs(input_df = my_data[! (my_data$STATEFP10==39), ],
                                       outcome_time1_name = "census_2000_AverageHomePrice",
                                       outcome_time2_name = "census_2009_AverageHomePrice")
#inflation: 24.6\% between 2000 and 2009
inflation_rate = 0.246
MainResults_mat_housing <- MainResults_mat_housing[[1]]
MainResults_mat_housing[,-1] = apply(MainResults_mat_housing[,-1],2,f2n)*(1-inflation_rate)#inflation adjustment
stargazer_df <- matrix(NA, nrow = 2, 4)
stargazer_df[,1] <- c("Baseline model", "With covariate adjustment")
stargazer_df[,2] <- MainResults_mat_housing[1:2,2]
stargazer_df[,3] <- MainResults_mat_housing[c(1:2), 3]
stargazer_df[,4] <- MainResults_mat_housing[c(3:4), 3]
colnames(stargazer_df) <- c("", "Estimate", "(Bootstrap S.D.)", "(Block Bootstrap S.D.)")
stargazer_df[,-1] <- apply(stargazer_df[,-1], 2, function(x) round(f2n(x), 0) )
stargazer_df[,3:4] <- apply(stargazer_df[,3:4], 2, function(x) sprintf("(%s)", x) )
stargazer_df[,2] <- sprintf("$%s", stargazer_df[,2])
stargazer_df_homeprice <- stargazer_df
stargazer(stargazer_df_homeprice,
          font.size = "footnotesize",
          label = "homeprice_did",
          title = "Difference-in-difference results for change in average home price.
          Matching/control variables include precinct-level covariates on average income,
          percent of residents living in the same house as in 1995, percent of residents who are black,
          percent of residents who are over the age of 65, percent of residents with a bachelor's degree. All matching/control
          data are from the US census.")

#main did for dem cash share
set.seed(4)
my_data_with_campaign_finance_reduced_for_did <- my_data_with_campaign_finance[!my_data_with_campaign_finance$STATEFP10 %in% 39,]
MainResults_mat_cash_full <- MainResults_mat_cash <- did_main_four_calcs(input_df = my_data_with_campaign_finance_reduced_for_did,
                                       outcome_time1_name = "CampaignFinance_SUM_CASH_DemShare_2000",
                                       outcome_time2_name = "CampaignFinance_SUM_CASH_DemShare_2004")
MainResults_mat_cash = MainResults_mat_cash[[1]]
MainResults_mat_cash[,-1] = apply(MainResults_mat_cash[,-1],2,f2n)*(1-inflation_rate)#inflation adjustment
stargazer_df <- matrix(NA, nrow = 2, 4)
stargazer_df[,1] <- c("Baseline model", "With covariate adjustment")
stargazer_df[,2] <- MainResults_mat_cash[1:2,2]
stargazer_df[,3] <- MainResults_mat_cash[c(1:2), 3]
stargazer_df[,4] <- MainResults_mat_cash[c(3:4), 3]
colnames(stargazer_df) <- c("", "Estimate", "(Bootstrap S.D.)", "(Block Bootstrap S.D.)")
stargazer_df[,-1] <- apply(stargazer_df[,-1], 2, function(x) round(100*f2n(x), 2) )
stargazer_df[,3:4] <- apply(stargazer_df[,3:4], 2, function(x) sprintf("(%s)", x) )
stargazer_df_demcash_share <- stargazer_df
stargazer(stargazer_df_demcash_share,
          font.size = "footnotesize",
          label = "homeprice_did",
          title = "Difference-in-difference results for change in Democratic cash share.
          Matching/control variables include precinct-level covariates on average income,
          percent of residents living in the same house as in 1995, percent of residents who are black,
          percent of residents who are over the age of 65, percent of residents with a bachelor's degree. All matching/control
          data are from the US census.")

#stationary contributors
source("./helperCode/causal_effect_estimation/dd_with_matchBoot_function.R")
source("./helperCode/master_helper_functions.R")
my_data_with_campaign_finance$ChangeIn_contribute_dem_pres_candidate_by_precinct <- my_data_with_campaign_finance$contribute_dem_pres_candidate_by_precinct_2004 - my_data_with_campaign_finance$contribute_dem_pres_candidate_by_precinct_2000
#my_data_with_campaign_finance_reduced_only_precincts_with_change_in_outcome <- my_data_with_campaign_finance[my_data_with_campaign_finance$ChangeIn_contribute_dem_pres_candidate_by_precinct != 0 & !(my_data_with_campaign_finance$STATEFP10 %in% 39), ]
my_data_with_campaign_finance_reduced_only_precincts_with_change_in_outcome <- my_data_with_campaign_finance[ !is.na(my_data_with_campaign_finance$ChangeIn_contribute_dem_pres_candidate_by_precinct) & !(my_data_with_campaign_finance$STATEFP10 %in% 39), ]
set.seed(1)
MainResults_mat_stationary_full <- MainResults_mat_stationary <- did_main_four_calcs(input_df = my_data_with_campaign_finance_reduced_only_precincts_with_change_in_outcome, #my_data_with_campaign_finance[ !(my_data_with_campaign_finance$STATEFP10 %in% 39),], #my_data_with_campaign_finance_reduced_for_did,
                                       outcome_time1_name = "contribute_dem_pres_candidate_by_precinct_2000",
                                       outcome_time2_name = "contribute_dem_pres_candidate_by_precinct_2004",
                                       return_confint = T)
MainResults_mat_stationary <- MainResults_mat_stationary[[1]]
MainResults_mat_stationary[,-1] = apply(MainResults_mat_stationary[,-1],2,f2n)*(1-inflation_rate)#inflation adjustment
lower_upper_BlockWithControls <- MainResults_mat_stationary_full$lower_upper_BlockWithControls
lower_upper_BlockWithoutControls <- MainResults_mat_stationary_full$lower_upper_BlockWithoutControls
MainResults_mat_stationary <- MainResults_mat_stationary_full$MainResults_mat
stargazer_df <- matrix(NA, nrow = 2, 4)
stargazer_df[,1] <- c("Baseline model", "With covariate adjustment")
stargazer_df[,2] <- MainResults_mat_stationary[1:2,2]
stargazer_df[,3] <- MainResults_mat_stationary[c(1:2), 3]
stargazer_df[,4] <- MainResults_mat_stationary[c(3:4), 3]
colnames(stargazer_df) <- c("", "Estimate", "(Bootstrap S.D.)", "(Block Bootstrap S.D.)")
stargazer_df[,-1] <- apply(stargazer_df[,-1], 2, function(x) round(100*f2n(x), 2) )
stargazer_df[,3:4] <- apply(stargazer_df[,3:4], 2, function(x) sprintf("(%s)", x) )
stargazer_df_stationary_contributors <- stargazer_df
stargazer(stargazer_df_stationary_contributors,
          font.size = "footnotesize",
          label = "stationary_contributors_did",
          title = "Difference-in-difference results for change in average support for
          Democratic presidential candidate among stationary contributors. Matching/control variables include
          precinct-level covariates on average income,
          percent of residents living in the same house as in 1995, percent of residents who are black,
          percent of residents who are over the age of 65, percent of residents with a bachelor's degree. All matching/control
          data are from the US census.")


#community change analysis
comparisons_matrix <- cbind(c("census_2000_AverageIncome",
                              "POP2000",
                              "turnout_00",
                              "census_2000_pbachelors",
                              "census_2000_pblack"),
                            c("census_2009_AverageIncome",
                              "POP2007",
                              "turnout_04",
                              "census_2009_pbachelors",
                              "census_2009_pblack") )
CI_mat <- matrix(NA, nrow = nrow(comparisons_matrix), ncol = 5)
set.seed(142)
for(row_i in 1:nrow(comparisons_matrix))
{
  print( row_i )
  source("./helperCode/causal_effect_estimation/dd_with_BootPreMatch_function.R")
  source("./helperCode/causal_effect_estimation/dd_with_matchBoot_function.R")
  RESULT_bootSDs_WithoutAdjust <- dd_with_dd(my_data=my_data[! (my_data$STATEFP10==39), ],
                                             treatment_basis_name=treatment_basis_name,
                                             control_basis_name=control_basis_name,
                                             outcome_time1_name=comparisons_matrix[row_i,1],
                                             outcome_time2_name=comparisons_matrix[row_i,2],
                                             matching_variables  = matching_variables,
                                             treatment_basis_treated_max = inclusion_rule,
                                             treatment_basis_control_min = exclusion_rule,
                                             control_basis_control_max = inclusion_rule,
                                             control_basis_treated_min = exclusion_rule,
                                             my_replace = my_replace, my_discard = my_discard,
                                             use_controls = F,  block = T,
                                             start_browser = F, my_distance = my_distance, my_caliper = my_caliper,
                                             bootstrap_numb = bootstrap_numb, my_method = my_method,boostrap_SDs = T, match=T)
  RESULT_bootSDs_WithAdjust <- dd_with_dd(my_data=my_data[! (my_data$STATEFP10==39), ],
                                          treatment_basis_name=treatment_basis_name,
                                          control_basis_name=control_basis_name,
                                          outcome_time1_name=comparisons_matrix[row_i,1],
                                          outcome_time2_name=comparisons_matrix[row_i,2],
                                          matching_variables  = matching_variables,
                                          treatment_basis_treated_max = inclusion_rule,
                                          treatment_basis_control_min = exclusion_rule,
                                          control_basis_control_max = inclusion_rule,
                                          control_basis_treated_min = exclusion_rule,
                                          my_replace = my_replace, my_discard = my_discard,
                                          use_controls = T,  block = T,
                                          start_browser = F, my_distance = my_distance, my_caliper = my_caliper,
                                          bootstrap_numb = bootstrap_numb, my_method = my_method, boostrap_SDs = T, match=T)
  CI_mat[row_i,] <- c(comparisons_matrix[row_i,1],
                      RESULT_bootSDs_WithoutAdjust$point_est, RESULT_bootSDs_WithoutAdjust$boot_sd,
                      RESULT_bootSDs_WithAdjust$point_est, RESULT_bootSDs_WithAdjust$boot_sd)
}
my_CI_mat <- CI_mat_original <- as.data.frame( CI_mat )
my_CI_mat[,1] <- c("Average income",
                "Population",
                "Turnout",
                "Percent with bachelors",
                "Percent black")
my_CI_mat[,-1] <- apply(my_CI_mat[,-1], 2, as.character)
my_CI_mat[-c(1:2),-1] <- apply(my_CI_mat[-c(1:2),-1], 2, function(x) round(100*f2n(x), 2) )
my_CI_mat[c(1:2),-1] <- apply(my_CI_mat[c(1:2),-1], 2, function(x) round(f2n(x), 2) )
stargazer_df <- my_CI_mat
colnames(stargazer_df) <- c("", "Estimate", "(Block Bootstrap S.D.)", "Estimate", "(Block Bootstrap S.D.)")
stargazer_df[,c(3,5)] <- apply(stargazer_df[,c(3,5)], 2, function(x) sprintf("(%s)", as.character(x)))
stargazer_df_CI_mat <- stargazer_df
stargazer( as.matrix( stargazer_df_CI_mat ) ,
           label = "alt_explanations",
           font.size = "footnotesize",
           title = c("Difference-in-difference results for other key variables.") )

#placebo diff-in-diff
source("./helperCode/causal_effect_estimation/dd_with_matchBoot_function.R")
MainResults_mat_placebo_full <- MainResults_mat_placebo <- did_main_four_calcs(input_df = my_data[(my_data$STATEFP10 %in% 39), ],
                                       outcome_time1_name = "census_2000_AverageHomePrice",
                                       outcome_time2_name = "census_2009_AverageHomePrice")
MainResults_mat_placebo = MainResults_mat_placebo[[1]]
stargazer_df <- matrix(NA, nrow = 2, 4)
stargazer_df[,1] <- c("Baseline model", "With covariate adjustment")
stargazer_df[,2] <- MainResults_mat_placebo[1:2,2]
stargazer_df[,3] <- MainResults_mat_placebo[c(1:2), 3]
stargazer_df[,4] <- MainResults_mat_placebo[c(3:4), 3]
colnames(stargazer_df) <- c("", "Estimate", "(Bootstrap S.D.)", "(Block Bootstrap S.D.)")
stargazer_df[,-1] <- apply(stargazer_df[,-1], 2, function(x) round(f2n(x), 0) )
stargazer_df[,3:4] <- apply(stargazer_df[,3:4], 2, function(x) sprintf("(%s)", x) )
stargazer_df[,2] <- sprintf("$%s", stargazer_df[,2])
stargazer_df_placebo_homeprice <- stargazer_df
stargazer(stargazer_df_placebo_homeprice,
          font.size = "footnotesize",
          label = "homeprice_did",
          title = "Placebo analysis. Difference-in-difference results for change in average home price for Ohio.
          Matching/control variables include precinct-level covariates on average income,
          percent of residents living in the same house as in 1995, percent of residents who are black,
          percent of residents who are over the age of 65, percent of residents with a bachelor's degree. All matching/control
          data are from the US census.")

source("./helperCode/causal_effect_estimation/dd_with_matchBoot_function.R")
MainResults_mat_placebo2_full <- MainResults_mat_placebo2 <- did_main_four_calcs(input_df = my_data[(my_data$STATEFP10 %in% 39), ],
                                       outcome_time1_name = "USPP2000",
                                       outcome_time2_name = "USPP2004")
MainResults_mat_placebo2 <- MainResults_mat_placebo2[[1]]
stargazer_df <- matrix(NA, nrow = 2, 4)
stargazer_df[,1] <- c("Baseline model", "With covariate adjustment")
stargazer_df[,2] <- MainResults_mat_placebo2[1:2,2]
stargazer_df[,3] <- MainResults_mat_placebo2[c(1:2), 3]
stargazer_df[,4] <- MainResults_mat_placebo2[c(3:4), 3]
colnames(stargazer_df) <- c("", "Estimate", "(Bootstrap S.D.)", "(Block Bootstrap S.D.)")
stargazer_df[,-1] <- apply(stargazer_df[,-1], 2, function(x) round(100 * f2n(x), 2) )
stargazer_df[,3:4] <- apply(stargazer_df[,3:4], 2, function(x) sprintf("(%s)", x) )
stargazer_df_placebo_voteshare <- stargazer_df
stargazer(stargazer_df_placebo_voteshare ,
          font.size = "footnotesize",
          label = "voteshare_did",
          title = "Placebo analysis. Difference-in-difference results for change in Democratic vote share for Ohio.
          Matching/control variables include precinct-level covariates on average income,
          percent of residents living in the same house as in 1995, percent of residents who are black,
          percent of residents who are over the age of 65, percent of residents with a bachelor's degree. All matching/control
          data are from the US census.")


source("./helperCode/causal_effect_estimation/dd_with_matchBoot_function.R")
MainResults_mat_placebo3_full <- MainResults_mat_placebo3 <- did_main_four_calcs(input_df = my_data[!(my_data$STATEFP10 %in% 39), ],
                                                outcome_time1_name = "USPP2004",
                                                outcome_time2_name = "USPP2008")
MainResults_mat_placebo3 <- MainResults_mat_placebo3[[1]]

################
#Final IV analysis
################
library(gtools)
threshold <- 30
my_data_with_campaign_finance_within_caliper <- my_data_with_campaign_finance[f2n(my_data_with_campaign_finance$EZPlaza_TravelDistOptimizedDist_Total_Leng) < threshold,]
my_data_with_campaign_finance_within_caliper$EZPlaza_TravelDistOptimizedDist_Total_Leng <- f2n(my_data_with_campaign_finance_within_caliper$EZPlaza_TravelDistOptimizedDist_Total_Leng)
my_data_with_campaign_finance_within_caliper$ChangeIn_contribute_dem_pres_candidate_by_precinct <- (my_data_with_campaign_finance_within_caliper$contribute_dem_pres_candidate_by_precinct_2004 - my_data_with_campaign_finance_within_caliper$contribute_dem_pres_candidate_by_precinct_2000)
my_data_with_campaign_finance_within_caliper$gain_in_demsupport_stationary <- 1 * (my_data_with_campaign_finance_within_caliper$ChangeIn_contribute_dem_pres_candidate_by_precinct > 0)
my_data_with_campaign_finance_within_caliper$DemVoteChange2000_to_2008 <- f2n(my_data_with_campaign_finance_within_caliper$USPP2008) - f2n(my_data_with_campaign_finance_within_caliper$USPP2000)
outcome_var_name <- "gain_in_demsupport_stationary"

dist_quantiles <- quantcut(my_data_with_campaign_finance_within_caliper$EZPlaza_TravelDistOptimizedDist_Total_Leng,
                           q = 20)

#MAIN PLOT
pdf("./IV_graphs.pdf", width = 14, height = 7)
source("./helperCode/IVPlot.R")
dev.off()

my_data$census_RelativeHousingPriceGainScore_inflationAdj <- (f2n(my_data$census_HousingPriceGainScore_inflationAdj) * 100000) / f2n(my_data$census_2000_AverageHomePrice)
my_data$census_RelativeHousingPriceGainScore_inflationAdj[which(is.infinite(my_data$census_RelativeHousingPriceGainScore_inflationAdj))] <- NA
causal_var_name <- "census_RelativeHousingPriceGainScore_inflationAdj"
treatment_basis_name <- "EZPlaza_TravelDistOptimizedDist_Total_Leng"

#IV 1 - democratic vote share
outcome_var_name <- "DemVoteChange2000_to_2004"
vars_to_numeric <- c(outcome_var_name, causal_var_name, treatment_basis_name, control_basis_name, matching_variables)
FixedEffect_vars <- c("COUNTYFP10", "STATEFP10")
my_data$DemVotePercentChange2000_to_2004 <- ( as.numeric(as.character( my_data$USPP2004 ))  - as.numeric( as.character( my_data$USPP2000 ) ) )
my_data$DemVotePercentChange2000_to_2004[abs(my_data$DemVotePercentChange2000_to_2004) %in% Inf] <- max(my_data$DemVotePercentChange2000_to_2004[my_data$DemVotePercentChange2000_to_2004<Inf], na.rm=T)
my_data_reduced <- my_data[!my_data$STATEFP10 %in% 39,]
threshold <- 12
my_data_caliper_reduced <- my_data_reduced[ f2n(my_data_reduced[,treatment_basis_name]) < threshold, ]
my_data_caliper_reduced[,vars_to_numeric] <- apply(my_data_caliper_reduced[,vars_to_numeric], 2, f2n)
source("./helperCode/master_helper_functions.R")
vote_share_IV <- IV_analysis(input_df = my_data_caliper_reduced,
                             outcome_var_name = outcome_var_name,
                             causal_var_name = causal_var_name,
                             treatment_basis_name = treatment_basis_name,
                             control_variables = matching_variables)

#vote share - 2004 to 2008
my_data$DemVotePercentChange2004_to_2008 <- ( as.numeric(as.character( my_data$USPP2008 ))  - as.numeric( as.character( my_data$USPP2004 ) ) )
my_data$DemVotePercentChange2004_to_2008[abs(my_data$DemVotePercentChange2004_to_2008) %in% Inf] <- max(my_data$DemVotePercentChange2004_to_2008[my_data$DemVotePercentChange2004_to_2008<Inf], na.rm=T)
my_data_reduced <- my_data[!my_data$STATEFP10 %in% 39,]
myvars_to_numeric <- unique( c(vars_to_numeric ,  treatment_basis_name)  )
my_data_reduced[,myvars_to_numeric] <- apply(my_data_reduced[, myvars_to_numeric], 2, f2n)
my_data_caliper_reduced <- my_data_reduced[ my_data_reduced[,treatment_basis_name] < threshold, ]
outcome_var_name <- "DemVotePercentChange2004_to_2008"
vote_share_IV_2008 <- IV_analysis(input_df = my_data_caliper_reduced,
                             outcome_var_name = outcome_var_name,
                             causal_var_name = causal_var_name,
                             treatment_basis_name = treatment_basis_name,
                             control_variables = c(matching_variables))#"minNetworkOptimizedDistance_Distance_nonEZ_hwy"))

#cash share
my_data_with_campaign_finance_caliper_reduced <- my_data_with_campaign_finance[! my_data_with_campaign_finance$STATEFP10 %in% 39, ]
my_data_with_campaign_finance_caliper_reduced <-  my_data_with_campaign_finance_caliper_reduced[ f2n(my_data_with_campaign_finance_caliper_reduced[,treatment_basis_name]) < threshold, ]
outcome_var_name <- "ChangeIn_CampaignFinance_SUM_CASH_DemShare"
to_numeric_varnames <- c(vars_to_numeric, outcome_var_name)
#my_data_with_campaign_finance_caliper_reduced[,to_numeric_varnames] <- apply(my_data_with_campaign_finance_caliper_reduced[,to_numeric_varnames], 2, function(x) f2n(x))
#cash_share_IV <- IV_analysis(input_df = my_data_with_campaign_finance_caliper_reduced,
                             #outcome_var_name = outcome_var_name,
                             #causal_var_name = causal_var_name,
                             #treatment_basis_name = treatment_basis_name,
                             #control_variables = c(matching_variables))#"minNetworkOptimizedDistance_Distance_nonEZ_hwy"))

#stationary contributor support
my_data_with_campaign_finance$ChangeIn_contribute_dem_pres_candidate_by_precinct <- (my_data_with_campaign_finance$contribute_dem_pres_candidate_by_precinct_2004 - my_data_with_campaign_finance$contribute_dem_pres_candidate_by_precinct_2000)
my_data_with_campaign_finance$gain_in_demsupport_stationary <- my_data_with_campaign_finance$ChangeIn_contribute_dem_pres_candidate_by_precinct#1 * (my_data_with_campaign_finance$ChangeIn_contribute_dem_pres_candidate_by_precinct > 0)
my_data_with_campaign_finance_reduced <- my_data_with_campaign_finance[!(my_data_with_campaign_finance$STATEFP10 %in% 39), ]
my_data_with_campaign_finance_caliper_reduced <- my_data_with_campaign_finance_reduced[f2n(my_data_with_campaign_finance_reduced[,treatment_basis_name]) <  threshold, ]
outcome_var_name <- "gain_in_demsupport_stationary"

set.seed(0)
RESULT_bootSDs_individual <- dd_with_dd(my_data = my_data_with_campaign_finance_reduced_only_precincts_with_change_in_outcome, #my_data_with_campaign_finance[ !(my_data_with_campaign_finance$STATEFP10 %in% 39),],
                                        treatment_basis_name=treatment_basis_name,
                                        control_basis_name=control_basis_name,
                                        outcome_time1_name="contribute_dem_pres_candidate_by_precinct_2000",
                                        outcome_time2_name="contribute_dem_pres_candidate_by_precinct_2004",
                                        matching_variables  = matching_variables,
                                        treatment_basis_treated_max = inclusion_rule,
                                        treatment_basis_control_min = exclusion_rule,
                                        control_basis_control_max = inclusion_rule,
                                        control_basis_treated_min = exclusion_rule, my_replace = my_replace,
                                        my_discard = my_discard, start_browser = F, use_controls = T,
                                        block = T,  my_distance = my_distance, my_caliper = my_caliper,
                                        bootstrap_numb = 2 * bootstrap_numb,
                                        my_method = my_method,#"nearest",
                                        boostrap_SDs = T,  match=T)
c(RESULT_bootSDs_individual$point_est, RESULT_bootSDs_individual$boot_sd)
#RESULT_bootSDs_individual$lower; RESULT_bootSDs_individual$upper
#contribute_dem_pres_candidate_by_precinct_2004 gives good results
matched_df <- as.data.frame( RESULT_bootSDs_individual$matched_dataset ); matched_df$ChangeIn_metric <- matched_df$contribute_dem_pres_candidate_by_precinct_2004 - matched_df$contribute_dem_pres_candidate_by_precinct_2000
treat_x <- matched_df$ChangeIn_metric[matched_df$treated_indicator == 1]
control_y <- matched_df$ChangeIn_metric[matched_df$treated_indicator == 0]
treat_x <- treat_x[treat_x!=0]; mean(treat_x)
control_y <- control_y[control_y!=0]; mean(control_y)

pdf("./empirical_cdf.pdf")
plot(sort(treat_x),
     1:length(treat_x) / length(treat_x),
     main = c(sprintf("Analysis of Stationary Contributors \n (Wilcox test p-value: %s)", round(wilcox.test(x = treat_x, y = control_y)$p.value,3) )),
     xlab = c("Change in Average Democratic Support"),
     ylab = c("Empirical Cumulative Probability"),
     type = "b",
     pch = 4,
     lty = 1,
     col = "black")
points(sort(control_y),
       1:length(control_y) / length(control_y),
       type = "b",
       pch = 1,
       lty = 3,
       col = "darkgray")
legend("topleft",
       legend = c("Treated Precincts", "Control Precincts"),
       pch = c(4, 1),
       lty = c(1, 3),
       col = c("black", "darkgray"))
dev.off()
wilcox.test(x = treat_x, y = control_y)#t.test(x = treat_x, y = control_y, var.equal = F)



#######
#Ohio analysis, 2008 to 2012
source("./helperCode/causal_effect_estimation/dd_with_BootPreMatch_function.R")
source("./helperCode/causal_effect_estimation/dd_with_matchBoot_function.R")
my_data_OH <- my_data[(my_data$STATEFP10==39), ]
source("./helperCode/causal_effect_estimation/dd_with_matchBoot_function.R")

matching_variables_OH2008 <- gsub(matching_variables, pattern = "2000", replace = "2009")
matching_variables_OH2008 <- matching_variables_OH2008[matching_variables_OH2008 %in% colnames(my_data_OH)]

set.seed(10)
source("./helperCode/causal_effect_estimation/dd_with_matchBoot_function.R")
RESULT_bootSDs_individual_block <- dd_with_dd(my_data = my_data_OH, #my_data_with_campaign_finance[ !(my_data_with_campaign_finance$STATEFP10 %in% 39),],
                                        treatment_basis_name=treatment_basis_name,
                                        control_basis_name=control_basis_name,
                                        outcome_time1_name="USPP2008",
                                        outcome_time2_name="USPP2012",
                                        matching_variables  = matching_variables_OH2008,
                                        treatment_basis_treated_max = inclusion_rule,
                                        treatment_basis_control_min = exclusion_rule/2, #to get more matches.
                                        control_basis_control_max = inclusion_rule,
                                        control_basis_treated_min = exclusion_rule, my_replace = F,
                                        my_discard = my_discard, start_browser = F,
                                        my_distance = my_distance,
                                        my_caliper = 0.20,
                                        bootstrap_numb = 100,
                                        my_method = my_method,
                                        my_ratio = 1,
                                        use_controls = F,  block = T,
                                        boostrap_SDs = T,  use_match = T)
RESULT_bootSDs_individual_block$point_est
RESULT_bootSDs_individual_block$boot_sd
#RESULT_bootSDs_individual_block$balance_table

matched_dataset <- as.data.frame( RESULT_bootSDs_individual_block$matched_dataset )
standardize_diff_vec <- c()
name_vec <- c()
counter_vec <- diff_sd_vec <- raw_diff_vec <- c()
mean_T_vec <- mean_C_vec <- sd_T_vec <- sd_C_vec <- overall_mean_vec <- overall_sd_vec <- c()
success_counter <- 0
for(i in 1:(ncol(matched_dataset)) )
{
  success_counter <- success_counter + 1
  mean_T_vec[i] <- mean_T <- mean(f2n(matched_dataset[matched_dataset$treated_indicator==1,i]), na.rm=T)
  sd_T_vec[i] <- sd(f2n(matched_dataset[matched_dataset$treated_indicator==1,i]), na.rm=T)
  mean_C_vec[i] <- mean_C <- mean(f2n(matched_dataset[matched_dataset$treated_indicator==0,i]), na.rm=T)
  sd_C_vec[i] <- sd(f2n(matched_dataset[matched_dataset$treated_indicator==0,i]), na.rm=T)

  overall_mean_vec[i] <- mean(f2n(matched_dataset[,i]), na.rm=T)
  overall_sd_vec[i] <- sd(f2n(matched_dataset[,i]), na.rm=T)

  sd_T <- se(f2n(matched_dataset[matched_dataset$treated_indicator==1,i]) )
  sd_C <- se(f2n(matched_dataset[matched_dataset$treated_indicator==0,i]) )

  n_T <- sum( matched_dataset$treated_indicator==1, na.rm = T)
  n_C <- sum( matched_dataset$treated_indicator==0, na.rm = T )

  #storage
  var_diff <- sd_T^2 + sd_C^2
  standardize_diff_vec[success_counter] <- (mean_T - mean_C) / sqrt( var_diff )
  counter_vec[success_counter] <- success_counter
  name_vec[success_counter] <- colnames(matched_dataset)[i]
  raw_diff_vec[success_counter] <- mean_T - mean_C
  diff_sd_vec[success_counter] <- sqrt(var_diff)
}

ggdata <- as.data.frame(cbind(standardize_diff_vec, name_vec,
                              counter_vec, raw_diff_vec, diff_sd_vec,
                              mean_T_vec, mean_C_vec,
                              sd_T_vec, sd_C_vec,
                              overall_mean_vec, overall_sd_vec ))
ggdata$name_vec <- as.character( ggdata$name_vec )
ggdata$name_vec[1:6] <- c("Average income", "% bachelors",
                     "% black",
                     "% female","% of pop. over 65",
                     "% professional degree")
ggdata <- as.data.frame( apply(ggdata, 2, function(x) as.character( x ) ),
                         stringsAsFactors = F )
ggdata <- ggdata[1:6,]
stargazer_input_df <- as.matrix(cbind(ggdata$name_vec,
                                      ggdata$overall_mean_vec, ggdata$overall_sd_vec,
                                      ggdata$mean_T_vec, ggdata$sd_T_vec,
                                      ggdata$mean_C_vec, ggdata$sd_C_vec,
                                      ggdata$raw_diff_vec, ggdata$diff_sd_vec))
stargazer_input_df[-1,-1] <- apply(stargazer_input_df[-1,-1], 2, function(x) 100*f2n(x))
colnames(stargazer_input_df) <- c("",
                                  "Mean", "(S.D.)",
                                  "Mean", "(S.D.)",
                                  "Mean", "(S.D.)",
                                  "Dif.", "(S.D)")
row.names(stargazer_input_df) <- NULL
stargazer_input_df[,-1] <- round(f2n(stargazer_input_df[,-1]), 2)
stargazer_input_df[1,-c(1,3,5,7,9)] <- sprintf("$%s", stargazer_input_df[1,-c(1,3,5,7,9)])
stargazer_input_df[,c(3,5,7, 9)] <- sprintf("(%s)", stargazer_input_df[,c(3,5,7, 9)])
#stargazer_input_df[-c(1),c(2,4,6, 8)] <- sprintf("%s", stargazer_input_df[-c(1),c(2,4,6,8)])
stargazer_input_df_ohio_balance <- stargazer_input_df
stargazer( stargazer_input_df,
           font.size = "footnotesize",
           label = "balance_table_ohio",
           title = sprintf("Pre-treatment balance for Ohio. Data from the 2010 Census.
                           Sample size: %s treated and %s control units.",
                           sum(matched_dataset$treated_indicator==1),
                           sum(matched_dataset$treated_indicator==0) ) )

stargazer_df <- matrix(NA, nrow = 1, 4)
stargazer_df[,1] <- c("Baseline model")
stargazer_df[,2] <- RESULT_bootSDs_individual_block$point_est
stargazer_df[,3] <- RESULT_bootSDs_individual_block$boot_sd
stargazer_df[,4] <- RESULT_bootSDs_individual_block$boot_sd
colnames(stargazer_df) <- c("", "Estimate", "(Bootstrap S.D.)", "(Block Bootstrap S.D.)")
stargazer_df[,-1] <- apply(t(stargazer_df[,-1]), 2, function(x) round(100 * f2n(x), 2) )
stargazer_df[,3:4] <- apply(t( stargazer_df[,3:4] ) , 2, function(x) sprintf("(%s)", x) )
stargazer_df_ohio_did <- stargazer_df
stargazer(stargazer_df,
          font.size = "footnotesize",
          label = "ohio2008_did",
          title = "Ohio supplementary analysis. Difference-in-difference results for change in average home price for Ohio.
          Matching/control variables include precinct-level covariates on average income, percent of residents who are black,
          percent of residents who are over the age of 65, percent of residents with a bachelor's degree. All matching/control
          data are from the 2010 US Census.")

#vote share graph for Ohio
causal_var_name <- "census_HousingPriceGainScore"
outcome_var_name <- "DemVotePercentChange2008_to_2012"
vars_to_numeric <- c(outcome_var_name, causal_var_name, treatment_basis_name, control_basis_name, matching_variables)
FixedEffect_vars <- c("COUNTYFP10", "STATEFP10")
my_data$DemVotePercentChange2008_to_2012 <- ( as.numeric(as.character( my_data$USPP2012 ))  - as.numeric( as.character( my_data$USPP2008 ) ) )
my_data$DemVotePercentChange2008_to_2012[abs(my_data$DemVotePercentChange2008_to_2012) %in% Inf] <- max(my_data$DemVotePercentChange2008_to_2012[my_data$DemVotePercentChange2008_to_2012<Inf], na.rm=T)
my_data_OH_reduced <- my_data[my_data$STATEFP10 %in% 39,]
#my_data_OH_caliper_reduced <- my_data_OH_reduced[ f2n(my_data_OH_reduced[,treatment_basis_name]) < 10, ]
my_data_OH_caliper_reduced <- my_data_OH_reduced[ f2n(my_data_OH_reduced[,treatment_basis_name]) < 11, ]
to_numeric_varnames <- c(outcome_var_name, causal_var_name, treatment_basis_name, matching_variables)
my_data_OH_caliper_reduced[,to_numeric_varnames] <- apply(my_data_OH_caliper_reduced[, to_numeric_varnames], 2,
                                                           function(x) f2n(x))

my_data_OH_caliper_reduced_NoNAs <- na.omit( my_data_OH_caliper_reduced[,c("EZPlaza_TravelDistOptimizedDist_Total_Leng",
                                                                  "minNetworkOptimizedDistance_Distance_nonEZ_hwy",
                                                                  "census_2009_pblack",
                                                                  "census_2009_pprofessionaldegree",
                                                                  "census_2009_pfemale",
                                                                  "County",
                                                                  "DemVotePercentChange2008_to_2012",
                                                                  "census_2009_ppop_over65",
                                                                  "census_2009_AverageIncome",
                                                                  "census_2009_pbachelors")] )

Text_DemVotePercentChange_08_12 <- "lm(DemVotePercentChange2008_to_2012~EZPlaza_TravelDistOptimizedDist_Total_Leng+
                                     County +
                                I(f2n(minNetworkOptimizedDistance_Distance_nonEZ_hwy)) +
                                I(f2n(census_2009_pblack)) +
                                I(f2n(census_2009_pprofessionaldegree)) +
                                I(f2n(census_2009_pfemale)) +
                                I(f2n(census_2009_ppop_over65)) +
                                I(f2n(census_2009_AverageIncome)) +
                                I(f2n(census_2009_pbachelors)) ,
                                data = %s)"

my_ohio_lm <-  eval(parse(text = sprintf(Text_DemVotePercentChange_08_12,
                                         "my_data_OH_caliper_reduced_NoNAs")))
clx <- function(fm, dfcw, cluster) {
  library(sandwich)
  library(lmtest)
  library(zoo)
  M <- length(unique(cluster))
  N <- length(cluster)
  dfc <- (M/(M-1))*((N-1)/(N-fm$rank)) # anpassung der freiheitsgrade
  u <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum))
  vcovCL <-dfc * sandwich (fm, meat = crossprod(u)/N) * dfcw
  coeftest(fm, vcovCL)
}
dfcw <- my_ohio_lm$df.residual / (my_ohio_lm$df.residual - (length(unique(as.character(my_data_OH_caliper_reduced_NoNAs$County))) - 1))
100 * (-10 * clx(my_ohio_lm, dfcw, as.character(my_data_OH_caliper_reduced_NoNAs$County) )[2,1:2]  )#cluster sd

unique_counties <- unique(  as.character(my_data_OH_caliper_reduced_NoNAs$County) ); coef_vec <- c()
for(i in 1:1000){
  if(i %% 100 == 0){print(i)}
  use_indices <- c()
  sampled_counties <- sample(unique_counties, length(unique_counties), replace = T)
  for(alpha in 1:length(sampled_counties)){
    use_indices <- c(use_indices, which(my_data_OH_caliper_reduced_NoNAs$County %in% sampled_counties[alpha]) )
  }
  my_data_OH_caliper_reduced_NoNAs_block <- my_data_OH_caliper_reduced_NoNAs[use_indices,]
  my_ohio_lm_boot <-  eval(parse(text = sprintf(Text_DemVotePercentChange_08_12,
                                                "my_data_OH_caliper_reduced_NoNAs_block")))
  coef_vec[i] <- my_ohio_lm_boot$coefficients[2]
}
100 * (-10 * clx(my_ohio_lm, dfcw, as.character(my_data_OH_caliper_reduced_NoNAs$County) )[2,1:2]  )#cluster sd
100 * (-10 * sd(coef_vec))#block boot sd

my_data_OH_caliper_reduced <- my_data_OH_reduced[ f2n(my_data_OH_reduced[,treatment_basis_name]) < 20, ]

pdf("./IV_graph_Ohio.pdf", width = 7, height = 7)
my_cex_title <- 1.5
my_cex <- 1.5
dist_quantiles <- quantcut(f2n(my_data_OH_caliper_reduced$EZPlaza_TravelDistOptimizedDist_Total_Leng),
                           q = 5)
par(mar=c(4, 4, 4, 4) )
scale_factor <- 100
y_use <- scale_factor * tapply(f2n(my_data_OH_caliper_reduced[,"DemVotePercentChange2008_to_2012"]), dist_quantiles, function(x) mean(x, na.rm = T) )
sd_use <-  scale_factor * tapply(f2n(my_data_OH_caliper_reduced[,"DemVotePercentChange2008_to_2012"]), dist_quantiles, function(x) se(x) )
x_use <- tapply(f2n(my_data_OH_caliper_reduced$EZPlaza_TravelDistOptimizedDist_Total_Leng), dist_quantiles, function(x) mean(x, na.rm = T))
my_lowess <- loess.smooth(x = f2n(my_data_OH_caliper_reduced$EZPlaza_TravelDistOptimizedDist_Total_Leng),
                          y = scale_factor * f2n(my_data_OH_caliper_reduced[,"DemVotePercentChange2008_to_2012"]),
                          degree = 2)
my_lowess_x <- my_lowess$x
my_lowess_y <- my_lowess$y
median_change_x <- median(f2n(my_data_OH_reduced[, "EZPlaza_TravelDistOptimizedDist_Total_Leng"]), na.rm = T)
median_change_y <- median(scale_factor * f2n(my_data_OH_reduced[, "DemVotePercentChange2008_to_2012"]), na.rm = T)
plot( x_use, y_use,
      pch = 4,
      cex = my_cex,
      lwd = 2,
      ylab = "Average Change in Dem. Vote Share",
      ylim = c(min(0, min(y_use - 2 * sd_use)), max(y_use + 2 * sd_use)),
      axes=FALSE, ann=FALSE)
axis(1, cex = my_cex); axis(2, las=1, cex = my_cex)
mtext("Miles from E-ZPass", side=1, line=3, cex = my_cex)
title(main = "Change in Dem. Vote Share from 2000 Baseline", cex.main = my_cex_title)
p <- par('usr')
text(p[2]+2.5, mean(p[3:4]), labels = "Average Change in Dem. Vote Share",
     xpd = NA, srt = -90, cex = my_cex)
box()
#points(x = my_lowess_x, y = my_lowess_y, type = "l")
segments(x0 = x_use,
         y0 = y_use - 2 * sd_use,
         x1 = x_use,
         y1 = y_use + 2 * sd_use,
         col = "gray")
dev.off()



my_data$DemVoteChange2000_to_2004_ranking <- rank(my_data$DemVoteChange2000_to_2004, na.last = "keep")
library(zoo)
thres <- 5
x <- f2n(my_data[f2n(my_data$EZPlaza_TravelDistOptimizedDist_Total_Leng)<thres,
                 treatment_basis_name])
y <- (my_data$DemVoteChange2000_to_2004[f2n(my_data[,treatment_basis_name])<thres])
smooth_y <- rollapply(y, width = 50, partial = F, by = 1,
                      FUN = function(x){mean(x, na.rm = T)})
# plot(x , y)
}
